** Clean QCEW raw files for controls and interaction variables 
** JHL 

*************************************
** Set up workspace
*************************************
version 14.0
clear all
set maxvar 32767, perm 
set more off

cd "${path_home}"
adopath + ../programs 

** log using "${path_log}/c03_qcew_county", text replace

*************************************
** Start work here
*************************************
timer on 1 

*************************************
** [1] Convert from csv to dta for each year, parallelized 
*************************************

foreach i of numlist 2004/2015 {

	import delimited using "${path_big_dta}/qcew/`i'.q1-q4.singlefile.csv", clear

	save "${path_big_dta}/qcew/`i'_q_singlefile", replace 

	* Aggregation level: County, Total covered
	keep if agglvl_code==70 

	save "${path_big_dta}/qcew/`i'_q_county", replace
}
	
*************************************
** [2] Append dta files 
*************************************

clear
foreach i of numlist 2004/2015 {
	append using "${path_big_dta}/qcew/`i'_q_county"
}

gen fips_state_code=int(area_fips/1000) 
gen fips_county_code=area_fips-fips_state_code*1000

rename qtr quarter 

gen state = fips_state_code 
merge m:1 state using "${path_big_dta}/us_states/state_regions", keep(master match) nogen 

save "${path_big_dta}/qcew/qcew_q_county_0415", replace 


*************************************
** [3] Rename variables in qcew for county aggregates
*************************************

use "${path_big_dta}/qcew/qcew_q_county_0415", clear
drop lq_disclosure_code-oty_avg_wkly_wage_pct_chg disclosure_code industry_code 

foreach v of varlist qtrly_estabs-avg_wkly_wage {
	rename `v' `v'_county
}

save "${path_big_dta}/qcew/qcew_county_0415", replace

*************************************
** [4] Create county earning quartiles, by fips x yq 
*************************************

use "${path_big_dta}/qcew/qcew_q_county_0415", clear
gen fips = area_fips 

** Drop unknown or undefined, QCEW includes some people who live in the state but in "undefined" counties 
drop if fips_county_code == 999 


gen yq=yq(year,quarter)
format yq %tq
tsset area_fips yq
gen logaw=log(avg_wkly_wage)
gen d1logaw=d1.logaw

** Use VZ state MW data 
cap noi gen state = fips_state_code 
gen statefips=fips_state_code
gen quarterly_date = yq(year,quarter)

merge m:1 statefips quarterly_date using "${path_big_dta}/mw/VZ/VZ_state_stata/VZ_state_quarterly.dta", keep(match) nogen 

** One weekly, one hourly, normalize by 40 hour work week 
gen mw_over_avg_w_county = (mean_mw*40) / avg_wkly_wage 
gen kaitz_c = mw_over_avg_w_county 

**************************************************************
** [4.2] Save version with kaitz and quantiles, panel in wide form, county x yq, quantiles calculated by yq
**************************************************************

** For each year quarter specified, gen kaitz variable 
	** Can only specify pre-periods: 2005-2007, 2013-2014 
foreach y of numlist 2004/2015 {
	foreach q of numlist 1 4 {
		gen temp = mw_over_avg_w_county if year == `y' & quarter == `q' 
		bys area_fips: egen kaitz_y`y'_qtr`q'_c = min(temp) 
		drop temp
	}
}

	* Likewise, gen avg_wkly_wage 
	foreach y of numlist 2004/2015 {
		foreach q of numlist 1 4 {
			gen temp = avg_wkly_wage if year == `y' & quarter == `q' 
			bys area_fips: egen avgwklyw_y`y'_qtr`q'_c = min(temp) 
			drop temp
		}
	}

egen m_avg_wkly_wage_county=mean(avg_wkly_wage), by(area_fips)
egen m_mw_over_avg_w_county = mean(mw_over_avg_w_county), by(area_fips)

duplicates drop area_fips, force

** Create quantiles by yq 
	** Works since we have unique area_fips 
foreach q of numlist 2 4 {
	foreach y of numlist 2004/2015 {
		foreach qtr of numlist 1 4 {
			xtile kaitz_y`y'_qtr`qtr'_q`q'_c = kaitz_y`y'_qtr`qtr'_c, nq(`q')
			xtile avgwklyw_y`y'_qtr`qtr'_q`q'_c = avgwklyw_y`y'_qtr`qtr'_c, nq(`q')
		}
	}
}

xtile m_avg_wkly_wage_county_q4 = m_avg_wkly_wage_county, nq(4) 
xtile m_mw_over_avg_w_county_q4 = m_mw_over_avg_w_county, nq(4)

xtile m_avg_wkly_wage_county_q2 = m_avg_wkly_wage_county, nq(2) 
xtile m_mw_over_avg_w_county_q2 = m_mw_over_avg_w_county, nq(2)

keep area_fips fips_state_code fips_county_code kaitz* avgwklyw* m_avg_wkly_wage_county* m_mw_over_avg_w_county*

cap gen logaw_y2006_qtr1_c = log(avgwklyw_y2006_qtr1_c)	

save "${path_big_dta}/qcew/qcew_county_wage_q24_0415", replace

*************************************
** [5] Keep Nielsen industries in QCEW data, create channel codes 
*************************************

foreach i of numlist 2004/2015 {
	use "${path_big_dta}/qcew/`i'_q_singlefile", clear
	* 4-digit NAICS, keep relevant industries 
	keep if industry_code=="4451"|industry_code=="44511"|industry_code=="44512"|industry_code=="4461"|industry_code=="4521"|industry_code=="4529"|industry_code=="722"|industry_code=="7225"|industry_code=="722511"|industry_code=="722513"
	* Private 
	keep if own_code==5
	keep area_fips industry_code year qtr disclosure_code qtrly_estabs avg_wkly_wage lq_disclosure_code lq_qtrly_estabs month1_emplvl month2_emplvl month3_emplvl
	save "${path_big_dta}/qcew/qcew_444572_`i'", replace
}

clear 
foreach i of numlist 2004/2015 {
	append using "${path_big_dta}/qcew/qcew_444572_`i'"
}

drop if area_fips==.

gen fips_state_code=int(area_fips/1000)
gen fips_county_code=area_fips-fips_state_code*1000

rename qtr quarter

save "${path_big_dta}/qcew/qcew_444572_0415", replace

gen channel_code="D" if industry_code=="4461"
replace channel_code="F" if industry_code=="4451"
replace channel_code="M" if industry_code=="4521" 

drop if channel_code=="" 

save "${path_big_dta}/qcew/qcew_DFM_0415", replace

*************************************
** Close workspace
*************************************
timer off 1
timer list 1
** log close